import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
rfm = pd.read_csv ("rfm_nextstep_export.csv")
rfm.head()
| customer_id | Recency | Frequency | Monetary | |
|---|---|---|---|---|
| 0 | 1 | 102 | 5 | 831.30 |
| 1 | 2 | 34 | 4 | 1840.77 |
| 2 | 3 | 297 | 3 | 136.50 |
| 3 | 4 | 527 | 2 | 349.70 |
| 4 | 5 | 112 | 11 | 2273.00 |
rfm.shape
(1283707, 4)
"""
Scoring suivant une logique de 1 à 5 pour la F et M
et de 5 à 1 pour la R (car il plus il est bas plus vous etes un bon client contrairement aux autres )
"""
rfm['R'] = pd.qcut(rfm['Recency'], 5, labels=[5,4,3,2,1])
rfm['F'] = pd.qcut(rfm['Frequency'].rank(method="first"), 5, labels=[1,2,3,4,5])
rfm['M'] = pd.qcut(rfm['Monetary'], 5, labels=[1,2,3,4,5])
rfm['RFM_Score'] = rfm[['R','F','M']].astype(str).agg(''.join, axis=1)
rfm.head()
| customer_id | Recency | Frequency | Monetary | R | F | M | RFM_Score | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 102 | 5 | 831.30 | 3 | 4 | 4 | 344 |
| 1 | 2 | 34 | 4 | 1840.77 | 5 | 4 | 5 | 545 |
| 2 | 3 | 297 | 3 | 136.50 | 2 | 3 | 2 | 232 |
| 3 | 4 | 527 | 2 | 349.70 | 1 | 2 | 3 | 123 |
| 4 | 5 | 112 | 11 | 2273.00 | 3 | 5 | 5 | 355 |
#Affichage de la distribution des score
fig = px.histogram(rfm, x='RFM_Score', title='Distribution des scores RFM')
fig.show()